home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
vol_400
/
432_01
/
wf120
/
match.c
< prev
next >
Wrap
C/C++ Source or Header
|
1992-01-06
|
19KB
|
579 lines
/*
EPSHeader
File: match.c
Author: J. Kercheval
Created: Sat, 01/05/1991 22:21:49
*/
/*
EPSRevision History
J. Kercheval Wed, 02/20/1991 22:29:01 Released to Public Domain
J. Kercheval Fri, 02/22/1991 15:29:01 fix '\' bugs (two :( of them)
J. Kercheval Sun, 03/10/1991 19:31:29 add error return to matche()
J. Kercheval Sun, 03/10/1991 20:11:11 add is_valid_pattern code
J. Kercheval Sun, 03/10/1991 20:37:11 beef up main()
J. Kercheval Tue, 03/12/1991 22:25:10 Released as V1.1 to Public Domain
J. Kercheval Thu, 03/14/1991 22:22:25 remove '\' for DOS file parsing
J. Kercheval Mon, 05/13/1991 21:49:05 ifdef full match code
J. Kercheval Mon, 01/06/1992 21:31:44 add match character defines
*/
/*
* Wildcard Pattern Matching
*/
#include "match.h"
/* character defines */
#define MATCH_CHAR_SINGLE '?'
#define MATCH_CHAR_KLEENE_CLOSURE '*'
#define MATCH_CHAR_RANGE_OPEN '['
#define MATCH_CHAR_RANGE '-'
#define MATCH_CHAR_RANGE_CLOSE ']'
#define MATCH_CHAR_LITERAL '\\'
#define MATCH_CHAR_NULL '\0'
#define MATCH_CHAR_CARAT_NEGATE '^'
#define MATCH_CHAR_EXCLAMATION_NEGATE '!'
/* forward function prototypes */
int matche_after_star(register char *pattern, register char *text);
int fast_match_after_star(register char *pattern, register char *text);
/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has any special wildcard characters
*
---------------------------------------------------------------------------*/
BOOLEAN is_pattern(char *p)
{
while (*p) {
switch (*p++) {
case MATCH_CHAR_SINGLE:
case MATCH_CHAR_KLEENE_CLOSURE:
case MATCH_CHAR_RANGE_OPEN:
#ifndef FILE_MATCH
case MATCH_CHAR_LITERAL:
#endif
return TRUE;
}
}
return FALSE;
}
/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has is a well formed regular expression according
* to the above syntax
*
* error_type is a return code based on the type of pattern error. Zero is
* returned in error_type if the pattern is a valid one. error_type return
* values are as follows:
*
* PATTERN_VALID - pattern is well formed
#ifndef FILE_MATCH
* PATTERN_ESC - pattern has invalid escape ('\' at end of pattern)
#endif
* PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-])
* PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g )
* PATTERN_EMPTY - [..] construct is empty (ie [])
*
---------------------------------------------------------------------------*/
BOOLEAN is_valid_pattern(char *p, int *error_type)
{
/* init error_type */
*error_type = PATTERN_VALID;
/* loop through pattern to EOS */
while (*p) {
/* determine pattern type */
switch (*p) {
#ifndef FILE_MATCH
/* check literal escape, it cannot be at end of pattern */
case MATCH_CHAR_LITERAL:
if (!*++p) {
*error_type = PATTERN_ESC;
return FALSE;
}
p++;
break;
#endif
/* the [..] construct must be well formed */
case MATCH_CHAR_RANGE_OPEN:
p++;
/* if the next character is ']' then bad pattern */
if (*p == MATCH_CHAR_RANGE_CLOSE) {
*error_type = PATTERN_EMPTY;
return FALSE;
}
/* if end of pattern here then bad pattern */
if (!*p) {
*error_type = PATTERN_CLOSE;
return FALSE;
}
/* loop to end of [..] construct */
while (*p != MATCH_CHAR_RANGE_CLOSE) {
/* check for literal escape */
if (*p == MATCH_CHAR_LITERAL) {
p++;
/* if end of pattern here then bad pattern */
if (!*p++) {
*error_type = PATTERN_ESC;
return FALSE;
}
}
else
p++;
/* if end of pattern here then bad pattern */
if (!*p) {
*error_type = PATTERN_CLOSE;
return FALSE;
}
/* if this a range */
if (*p == MATCH_CHAR_RANGE) {
/* we must have an end of range */
if (!*++p || *p == MATCH_CHAR_RANGE_CLOSE) {
*error_type = PATTERN_RANGE;
return FALSE;
}
else {
/* check for literal escape */
if (*p == MATCH_CHAR_LITERAL)
p++;
/* if end of pattern here then bad pattern */
if (!*p++) {
*error_type = PATTERN_ESC;
return FALSE;
}
}
}
}
break;
/* all other characters are valid pattern elements */
case MATCH_CHAR_KLEENE_CLOSURE:
case MATCH_CHAR_SINGLE:
default: /* "normal" character */
p++;
break;
}
}
return TRUE;
}
/*----------------------------------------------------------------------------
*
* Match the pattern PATTERN against the string TEXT;
*
* returns MATCH_VALID if pattern matches, or an errorcode as follows
* otherwise:
*
* MATCH_PATTERN - bad pattern
#ifndef FILE_MATCH
* MATCH_LITERAL - match failure on literal mismatch
#endif
* MATCH_RANGE - match failure on [..] construct
* MATCH_ABORT - premature end of text string
* MATCH_END - premature end of pattern string
* MATCH_VALID - valid match
*
*
* A match means the entire string TEXT is used up in matching.
*
* In the pattern string:
* `*' matches any sequence of characters (zero or more)
* `?' matches any character
* [SET] matches any character in the specified set,
* [!SET] or [^SET] matches any character not in the specified set.
* \ is allowed within a set to escape a character like ']' or '-'
*
* A set is composed of characters or ranges; a range looks like character
* hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the minimal set of
* characters allowed in the [..] pattern construct. Other characters are
* allowed (ie. 8 bit characters) if your system will support them.
*
* To suppress the special syntactic significance of any of `[]*?!^-\', and
* match the character exactly, precede it with a `\'.
*
---------------------------------------------------------------------------*/
int matche(register char *p, register char *t)
{
register char range_start, range_end; /* start and end in range */
BOOLEAN invert; /* is this [..] or [!..] */
BOOLEAN member_match; /* have I matched the [..] construct? */
BOOLEAN loop; /* should I terminate? */
for (; *p; p++, t++) {
/* if this is the end of the text then this is the end of the match */
if (!*t) {
return (*p == MATCH_CHAR_KLEENE_CLOSURE &&
*++p == MATCH_CHAR_NULL) ?
MATCH_VALID : MATCH_ABORT;
}
/* determine and react to pattern type */
switch (*p) {